* do run2_prepare_station_data.do

	****************************************
	* SPECIFY PATHS
	****************************************

global path="./../";
global data_output="${path}output/"
global data_input="${path}input/"
global data_results="${path}results/"

global folder_maptile="" /* specify folder name */

	****************************************
	* PREPARE POPULATION DATA 
	****************************************
 
use ${data_input}county_population.dta, clear
keep if length(fipsco)==5
drop pop198? pop199? pop200?
drop pop197?

gen county=fipsco
destring county, replace

keep county pop2014 state_fips state_name

sort county
tempfile county_pop2014

save `county_pop2014', replace

	****************************************
	* PREPARE ZIP TO ZCTA FILE
	****************************************

insheet using ${data_input}Zip_to_ZCTA_crosswalk_2015_JSI.csv, clear

keep zip zcta
sort zip

tempfile zip_zcta
qui compress
save `zip_zcta', replace

	****************************************
	* 
	****************************************
 
clear
import delimited using ${data_input}2015_Gaz_counties_national.txt
keep geoid name usps
rename name county_name

qui compress
tempfile county_file2
save `county_file2', replace

	****************************************
	* 
	****************************************
 
clear
import delimited ${data_input}zcta_county_rel_10.txt, clear

replace geoid=46102 if geoid==46113

gen nn_all=_n

isid zcta5 geoid
bys zcta5 (nn_all): gen nn=_n
bys zcta5 (nn_all): gen NN=_N

sort geoid
merge geoid using `county_file2', keep(county_name) uniqusing sort nokeep
tab _merge
keep if _merge==3
drop _merge

keep zcta5 geoid zpoppct county_name nn NN

gen weight=zpoppct/100
gen weight_old=weight

replace weight=1 if weight==0 & nn==1 & NN==1

bys zcta5 (geoid): egen check=sum(weight)

replace weight=weight/check if check>0

drop check weight_old zpoppct NN

reshape wide weight geoid county_name, i(zcta5) j(nn) 

sort zcta5
qui compress
tempfile zcta5_county
save `zcta5_county', replace

	****************************************
	* PREPARE COMBINED STATA FILE
	****************************************
 
 clear 
 
 tempfile file1 file2 file3 file4
 
 import delimited using ${data_output}data.txt, delimiter(tab)
 
 rename v1 day
 rename v2 price_final
 rename v3 state
 rename v4 uc
 rename v5 when_final
 gen n=_n
 qui compress
 save `file1'
 
 clear
 import delimited using ${data_output}data_string_uno.txt, delimiter(tab) encoding("utf-8")
 rename v1 uno_final
 gen n=_n
 qui compress
 save `file2'
 
 clear
 import delimited using ${data_output}data_string_zip.txt, delimiter(tab)
 rename v1 zip_final
 gen n=_n
 qui compress
 save `file3'
 
 clear
 import delimited using ${data_output}data_string_state_names.txt, delimiter(tab)
 rename v1 state_names
 gen n=_n
 qui compress
 save `file4'
 
 clear
 use `file1'
 sort n
 foreach nn of numlist 2/4 {
	merge n using `file`nn'', unique sort
	assert _merge==3
	drop _merge
}

local xx: type zip_final
assert ("`xx'"=="float")

qui compress

	****************************************
	* UPDATE ZIP CODES
	****************************************

replace zip_final=39819 if zip_final==31717
replace zip_final=43604 if zip_final==43602
replace zip_final=43616 if zip_final==43618
replace zip_final=44115 if zip_final==44178
replace zip_final=45417 if zip_final==45408
replace zip_final=45417 if zip_final==45427
replace zip_final=50702 if zip_final==50706
replace zip_final=85119 if zip_final==85219
replace zip_final=85120 if zip_final==85220
replace zip_final=85120 if zip_final==85222
replace zip_final=85131 if zip_final==85231
replace zip_final=85132 if zip_final==85232
replace zip_final=85138 if zip_final==85239
replace zip_final=85141 if zip_final==85241
replace zip_final=85142 if zip_final==85243
replace zip_final=85147 if zip_final==85247
replace zip_final=85172 if zip_final==85272
replace zip_final=85173 if zip_final==85273
replace zip_final=85192 if zip_final==85292

gen zip=zip_final
sort zip
merge zip using `zip_zcta', nokeep keep(zcta)
tab _merge
tab _merge if zip!=.
drop _merge

gen zcta5=zcta
sort zcta5
merge zcta5 using `zcta5_county', nokeep uniqusing keep(county_name? weight? geoid?)
tab _merge
assert zcta5==. if _merge==1
drop _merge

keep price_final day geoid? county_name? weight? n state state_names

	****************************************
	* CHANGE IN LONG FORMAT
	****************************************
 
reshape long geoid county_name weight, i(n) j(jj)

drop if weight==0 | weight==.

gen state_new=floor(geoid/1000)

qui compress

	****************************************
	* EXTRACT RESIDUALS
	****************************************
 
gen ln_price=log(price_final) 
replace ln_price=. if ln_price<0

qui tab day, gen(day_d)
areg ln_price day_d* [aw=weight], absorb(geoid)

predict uhat if e(sample), resid

	*

qui compress 

	****************************************
	* 
	****************************************
 
preserve

	collapse (sd) sd_uhat=uhat (count) N=uhat (rawsum) sumweight=weight [aw=weight], by(geoid)

	sort geoid
	merge geoid using `county_file2', keep(county_name usps) uniqusing sort
	tab _merge

		* DROP PUERTO RICO 
		
	drop if _merge==2 & usps=="PR"
	drop _merge

	save ${data_output}sd_county_areg.dta, replace

restore

	****************************************
	* 
	****************************************
 
drop if state_names=="AK" | state_names=="HI"

	* PLOT HISTOGRAM 
	
isid n jj

gsort + n - weight
gen aux=_n

bys n (aux): gen check_n=_n

keep if check_n==1

isid n
sort n

qui compress

preserve

	graph twoway ///
		(hist uhat if check_n==1 & abs(uhat)<=0.2, lcolor(gs4) fcolor(gs10) lwidth(vthin)), ///
		scheme(s2mono) ///
		name(hist_within, replace)

	graph export ${data_results}figure-2.pdf, name(hist_within) replace 

restore

	****************************************
	* 
	****************************************
 
preserve

	set seed 12345

	keep uhat 

	drawnorm aux
	sort aux
	drop aux

	keep in 1/1000
	keep uhat
	rename uhat eps_vec
		
	export delimited using "${data_output}station_data_eps_vec_N1000.csv", replace delimiter(";") novarnames

restore

	****************************************
	* SAMPLE, BY STATE
	****************************************

	* MERGE IN STATE INFO

gen county=geoid 

	* ADJUSTMENTS TO COUNTY CODES
		
replace county=46113 if county==46102 
replace county=2270 if county==2158 

sort county 
merge county using `county_pop2014', keep(state_fips state_name) nokeep
tab _merge
assert _merge==3
drop _merge

qui compress

	*

sort n
merge n using ${data_input}state_draw, keep(state_draw_d) unique
assert _merge==3
drop _merge

	*

sum uhat
keep if abs(uhat)<=0.2

	*

keep if state_draw_d==1
drop state_draw_d

keep state_name state_fips uhat

rename uhat eps_vec

export delimited state_fips eps_vec using "${data_output}station_data_eps_vec_state.csv", replace delimiter(",") novarnames

	* 

use ${data_output}sd_county_areg.dta, clear

gen county=geoid 

	* ADJUSTMENTS RE COUNTY CODES
	
replace county=46113 if county==46102 
replace county=2270 if county==2158 

sort county
merge county using "${folder_maptile}county2014_database.dta", keep() sort
tab _merge
assert _merge==3
drop _merge 

sort county 
merge county using `county_pop2014', keep(pop2014)
tab _merge
assert _merge==3
drop _merge

drop if usps=="AK" | usps=="HI"

maptile sd_uhat, geography(county2014) stateoutline(medium) nquantiles(10) ndfcolor(gs16) rangecolor(gs14 gs2)  ///
	conus ///
	spopt(legend(size(small))) ///
	savegraph("${data_results}figure-1.pdf") replace

graph twoway (hist sd_uhat, lcolor(gs4) fcolor(gs10) lwidth(vthin) bin(30)), name(hist_unweighted, replace) ///
	scheme(s2mono) ///
	xtitle("within county price dispersion") 

graph export ${data_results}figure-3.pdf, name(hist_unweighted) replace

	*

clear

	*
	
